library(MASS)
#Then use the chisq.test() function to carry out the test

# can more succinctly input frequencies instead of raw data
#obtain Pearson chi-square which approximates likelihood ratio chi-square

sex <- rep(1:2, c(293,182))
region <- c(rep(1,199),rep(2,27),rep(3,67),rep(1,134),rep(2,25),rep(3,23))
result <- chisq.test(table(sex,region))

# see where difference is
result$expected
result$observed
result
z <- (result$observed-result$expected)/sqrt(result$expected)
pout <- 2*pnorm(-abs(z),0,1)
z
pout
chisq.test(table(sex,region))$stdres
# standardised residual additionally adjusts for sample size and divided O-E by sqrt(E(1-r)*(1-c))

# if any expected counts less than 5 user Fisher's test
fisher.test(table(sex,region))

X <- cbind(c(19,2), c(10,9))
mcnemar.test(X, correct=TRUE)
# check entered 2x2 table correctly
Z <- cbind(c(15,2,3),c(1,3,2),c(0,1,3))
Z 
#install.packages("fmsb")
library(fmsb)
Kappa.test(Z,y=NULL, conf.level=0.95)

region2 <- c(rep(0,100),rep(1,193),rep(0,82),rep(1,100))
glm(region2 ~ sex, family=binomial)
exp(-0.459)
table(region2,sex)
(100*100)/(82*193)

# fit a log-linear model using Poisson model which generalises
# to factors with 3 or more levels
# region3 is do you live in the south of the state (yes/no)

freq <- c(100,82,193,100)

region3 <- c(1,1,2,2)
sex3 <- c(1,2,1,2)

Modelsex <- glm(region2 ~ sex, family=binomial)
Modelnosex <- glm(region2 ~ 1, family=binomial)
#anova(Modelint,Modelnoint)
anova(Modelsex,Modelnosex)
summary(Modelsex)

# Note have to convert the variables into factors as below since they only take a finite (small) set of values
# when the variables correspond to 3 or more levels

Modelint <- glm(freq ~ as.factor(sex3) + as.factor(region3) + as.factor(sex3):as.factor(region3), family=poisson)
Modelnoint <- glm(freq ~ as.factor(sex3) + as.factor(region3), family=poisson)

# identical likelihood ratio chi-square values for gender differences in region fitting Poisson and binomial models
anova(Modelint,Modelnoint)

table(Modelsex$fitted.values,sex)

Vout <- vcov(Modelsex)

c(1,1) %*% Vout %*% c(1,1)^T

Modelsex

# 95%CI for Males (sex=1) = exp(1.1166 - 0.4592) +/- 1.96 X sqrt(0.01512)

# can obtain this same variance of odds ratio of how much more likely to are to live in the south if 
# you are a male by recoding sex so that males take the value 0 (and females value 1) as opposed
# to values 1 for males and 2 for females as earlier

sex01 <- sex-1
Modelsex <- glm(region2 ~ sex01, family=binomial)

# variance for males in top left hand corner corresponding to that for the intercept agrees with earlier logistic
# regression equal to 0.01512

vcov(Modelsex)

table(Modelsex$fitted.values,sex)
exp(0.6575)/(1+exp(0.6575))

exp(0.6575-0.4591)/(1+exp(0.6575-0.4591))